Project description

Effect of cohesin looping on genome architecture, from the perspective of the nuclear lamina.

Introduction

Look whether LAD border class change after protein depletions, to prove the point that CTCF and cohesin are not involved in LAD border positioning. Or to which extent they are.

Method

Use HMM models of the various experiments, and compare LAD border positioning with the LAD borders defined in the parental data.

Set-up

Set the parameters and list the data.

library(GenomicRanges)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::collapse()   masks IRanges::collapse()
## x dplyr::combine()    masks BiocGenerics::combine()
## x dplyr::desc()       masks IRanges::desc()
## x tidyr::expand()     masks S4Vectors::expand()
## x dplyr::filter()     masks stats::filter()
## x dplyr::first()      masks S4Vectors::first()
## x dplyr::lag()        masks stats::lag()
## x ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## x purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## x dplyr::rename()     masks S4Vectors::rename()
## x dplyr::slice()      masks IRanges::slice()
library(ggbeeswarm)
library(ggplot2)
library(broom)

# Prepare output 
output_dir <- "ts220117_LAD_border_changes"
dir.create(output_dir, showWarnings = FALSE)

# Load input
input_dir <- "ts220113_CTCF_enrichment_at_LAD_borders"
LADs <- readRDS(file.path(input_dir, "LADs_pA.rds"))
LAD_borders <- readRDS(file.path(input_dir, "LAD_borders_pA.rds"))

input_dir <- "ts220113_effect_of_CTCF_depletion_on_LAD_borders"
bin_size <- readRDS(file.path(input_dir, "bin_size.rds"))
metadata_damid <- readRDS(file.path(input_dir, "metadata_damid.rds"))
damid <- readRDS(file.path(input_dir, "damid.rds"))

# Select input
LADs <- LADs[[1]]
LAD_borders <- LAD_borders[[1]]
LAD_borders <- LAD_borders[LAD_borders$ovl_gene == F]

bins <- damid
mcols(bins) <- NULL

Knitr set-up.

library(knitr)
opts_chunk$set(fig.width = 8, fig.height = 3.5, cache = T,
               dev=c('png', 'pdf'), fig.path = file.path(output_dir, "figures/")) 

pdf.options(useDingbats = FALSE)

Functions.

LoadBed <- function(metadata, column, reduce = F, size_filter = F) {
  # Load LADs as GRangesList from metadata object
  bed <- GRangesList()
  for (i in 1:nrow(metadata)) {
    f.name <- (metadata %>% pull(column))[i]
    f.import <- import(f.name)
    if (reduce) {
      f.import <- GenomicRanges::reduce(f.import, min.gapwidth = 50e3)
    }
    if (size_filter) {
      f.import <- f.import[width(f.import) > 50e3]
    }
    f.import$sample <- metadata$sample[i]
    f.import <- f.import[seqnames(f.import) %in% c(paste0("chr", 1:22), "chrX")]
    bed <- c(bed, GRangesList(f.import))
  }
  names(bed) <- metadata$sample
  bed
}

LADBorders <- function(LADs, bins, min.distance = 0) {
  # Given a (named) GRangesList of LADs, return a GRangesList with borders
  # Strand information defines left (+) or right (-) border
  # Also, require complete bin object to determine chromosome start and end
  
  cells <- names(LADs)
  LAD.borders <- GRangesList()
  
  for (cell in cells ) {
    
    # Get LADs and bins for cell
    cell.LADs <- LADs[[cell]]
    cell.bins <- bins
    
    # Remove small iLADs and remove small LADs
    cell.LADs <- GenomicRanges::reduce(cell.LADs, min.gapwidth = min.distance)
    cell.LADs <- cell.LADs[width(cell.LADs) > min.distance]
    
    # Get borders
    cell.borders <- c(GRanges(seqnames = seqnames(cell.LADs),
                              ranges = IRanges(start = start(cell.LADs),
                                               end = start(cell.LADs)),
                              strand = "+"),
                      GRanges(seqnames = seqnames(cell.LADs),
                              ranges = IRanges(start = end(cell.LADs),
                                               end = end(cell.LADs)),
                              strand = "-"))
    cell.borders <- sort(cell.borders, ignore.strand = T)
    
    # Get start and end of the chromosome and filter overlapping borders
    chromosome.ends <- c(cell.bins[! duplicated(as.character(seqnames(cell.bins)))],
                         rev(cell.bins)[! duplicated(as.character(seqnames(rev(cell.bins))))])
    chromosome.ends <- flank(chromosome.ends, 5000, both = T)
    
    cell.borders <- cell.borders[! overlapsAny(cell.borders, 
                                               chromosome.ends, 
                                               ignore.strand = T)]
    
    cell.borders$cell <- cell
    
    LAD.borders <- c(LAD.borders, GRangesList(cell.borders))
  }
  
  names(LAD.borders) <- cells
  LAD.borders
  
}

grMid = function(gr) {
  start(gr) <- end(gr) <- rowMeans(cbind(start(gr), end(gr)))
  gr
}

1. Distance to closest border

First, load LADs.

# Prepare LAD metadata
metadata_lads <- metadata_damid %>% 
  filter(! condition %in% c("CTCF", "CTCFNQ")) %>%
  mutate(lad_file = file.path(paste0("../results_NQ/HMM/bin-", bin_size),
                              str_replace(file, ".norm.txt.gz", "_AD.bed.gz")))

LAD_list <- LoadBed(metadata_lads, "lad_file", reduce = T, size_filter = T)


# Prepare borders
LAD_border_list <- LADBorders(LAD_list, bins)

I will start simple: determine the distance from the selected borders to the closest border.

Then, I will plot the distance to the nearest border. However, I want to determine whether the border is displaced. If the entire LAD is lost, the nearest border will be “wrong”. To prevent this, I will only look in a 100 kb window from the parental border, and disregard nearest borders that are further away than that.

In this way, I can specifically ask whether CTCF depletion (or any other depletion) results in a shift of the LAD border. Perhaps not ideal, but together with previous analyses of average signal at LAD borders, it should be a convincing analysis. I hope.

# For all samples, determine distance to nearest border
border_distance <- tibble()

for (sample in metadata_lads$sample) {
  
  # Get borders for sample
  LAD_sample <- LAD_list[[sample]]
  LAD_border_sample <- LAD_border_list[[sample]]
  
  # Distance to nearest border
  dis <- as_tibble(distanceToNearest(LAD_borders, LAD_border_sample, ignore.strand = T)) %>%
    rename_all(~ c("border_idx", "sample_idx", "distance"))
  
  # Filter and add metadata
  dis <- dis %>%
    arrange(sample_idx, distance) %>%
    filter(! duplicated(sample_idx)) %>%
    mutate(sample = sample) %>%
    mutate(border_strand = as.character(strand(LAD_borders))[border_idx],
           sample_strand = as.character(strand(LAD_border_sample))[sample_idx],
           border_within_lad = overlapsAny(LAD_borders[border_idx], LAD_sample,
                                           ignore.strand = T),
           sample_within_lad = overlapsAny(LAD_border_sample[sample_idx], LADs,
                                           ignore.strand = T)) %>%
    mutate(distance = case_when(sample_within_lad == T ~ distance,
                                T ~ - distance))
  
  # Add to all border distances
  border_distance <- bind_rows(border_distance, 
                               dis)
  
}

# Prepare for plotting
border_distance <- border_distance %>%
  separate(sample, c("condition", "timepoint"), remove = F) %>%
  mutate(condition = factor(condition, levels(metadata_lads$condition)),
         timepoint = factor(timepoint, levels(metadata_lads$timepoint)),
         distance_kb = distance / 1e3,
         distance_kb = (distance_kb %/% 10) * 10) %>%
  filter(timepoint != "96h") %>%
  mutate(CTCF = LAD_borders$CTCF[border_idx],
         CTCF_strand = LAD_borders$CTCF_strand[border_idx],
         CTCF_count = LAD_borders$CTCF.count[border_idx],
         CTCF_strand = factor(CTCF_strand, c("outwards", "inwards", "ambiguous", "nonCTCF"))) %>%
  mutate(CTCF_count = case_when(CTCF_count >= 3 ~ ">=3",
                                CTCF_count == 2 ~ "2",
                                CTCF_count == 1 ~ "1",
                                T ~ "nonCTCF"),
         CTCF_count = factor(CTCF_count,
                             levels = c("nonCTCF", "1", "2", ">=3")))

# Plot this
border_distance %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

border_distance %>%
  filter(condition == "CTCFEL") %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

border_distance %>%
  filter(condition %in% c("RAD21", "WAPL", "CTCFWAPL")) %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

# Plot this - assuming shift less than 100kb and removing PT
border_distance %>%
  filter(condition != "PT",
         distance_kb > -101, distance_kb < 101) %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

border_distance %>%
  filter(condition == "CTCFEL",
         distance_kb > -101, distance_kb < 101) %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

border_distance %>%
  filter(condition %in% c("RAD21", "WAPL", "CTCFWAPL"),
         distance_kb > -101, distance_kb < 101) %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

# Plot this - assuming shift less than 100kb and removing PT, with CTCF strand information
border_distance %>%
  filter(condition != "PT",
         distance_kb > -101, distance_kb < 101) %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF_strand ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

# Plot this - assuming shift less than 100kb and removing PT, with CTCF count information
border_distance %>%
  filter(condition == "CTCFEL",
         distance_kb > -101, distance_kb < 101) %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF_count ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

border_distance %>%
  filter(condition != "PT",
         distance_kb > -101, distance_kb < 101) %>%
  ggplot(aes(x = distance_kb, col = timepoint)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(CTCF_count ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

# Plot this - assuming shift less than 100kb and removing PT, by border type
border_distance %>%
  filter(condition != "PT",
         distance_kb > -101, distance_kb < 101) %>%
  ggplot(aes(x = distance_kb, col = CTCF)) +
  stat_ecdf(geom = "line") +
  annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf, 
           fill = "grey", alpha = 0.3) +  
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(timepoint ~ condition) +
  coord_cartesian(xlim = c(-100, 100)) +
  xlab("Distance to LAD border (kb)") +
  ylab("Cum density") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))

This looks sort of as expected. Looks good. I can use these figures in the manuscript.

2. Linear models of border slopes

A different approach - use the border to determine a linear slope and compare slope / intercept.

# Get border bins
max_distance <- 3e4

bins_mid <- grMid(bins)

dis <- as_tibble(distanceToNearest(bins_mid, LAD_borders, ignore.strand = T)) %>%
  rename_at(1:2, ~ c("bin_idx", "border_idx")) %>%
  filter(distance < max_distance) %>%
  mutate(LAD_ovl = overlapsAny(bins_mid[bin_idx], LADs),
         distance = case_when(LAD_ovl == T ~ distance,
                              T ~ -distance),
         distance = distance / 1e3,
         distance = (distance %/% 10) * 10)

# Add DamID values
dis <- bind_cols(dis, 
                 as_tibble(mcols(damid))[dis$bin_idx, ] %>%
                   dplyr::select(one_of(metadata_lads$sample))) %>%
  arrange(border_idx, distance) %>%
  dplyr::select(-bin_idx, -LAD_ovl) %>%
  gather(key, value, -border_idx, -distance) %>%
  drop_na()

# Linear models
border_models <- dis %>%
  group_by(key, border_idx) %>%
  do(border_fit = tidy(lm(value ~ distance, data = .))) %>% 
  unnest(border_fit)
# Prepare for plotting
border_models_plot <- border_models %>%
  mutate(term = str_remove_all(term, "\\(|\\)")) %>%
  dplyr::select(key, border_idx, term, estimate) %>%
  spread(term, estimate) %>%
  separate(key, c("condition", "timepoint"), remove = F) %>%
  mutate(condition = factor(condition, levels(metadata_lads$condition)),
         timepoint = factor(timepoint, levels(metadata_lads$timepoint))) %>%
  mutate(CTCF = LAD_borders$CTCF[border_idx]) %>%
  filter(timepoint != "96h")

# Plot models
border_models_plot %>%
  ggplot(aes(x = timepoint, y = distance, col = CTCF, group = interaction(timepoint, CTCF))) +
  geom_quasirandom(dodge.width = 0.8) +
  geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
  geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
  facet_grid(. ~ condition, scale = "free_x", space = "free_x") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() + 
  theme(#aspect.ratio = 3, 
        axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).

## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 2 rows containing missing values (position_quasirandom).

border_models_plot %>%
  ggplot(aes(x = timepoint, y = Intercept, col = CTCF, group = interaction(timepoint, CTCF))) +
  geom_quasirandom(dodge.width = 0.8) +
  geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
  geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
  facet_grid(. ~ condition, scale = "free_x", space = "free_x") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() + 
  theme(#aspect.ratio = 3, 
        axis.text.x = element_text(angle = 90, hjust = 1))

border_models_plot %>%
  mutate(timepoint = factor(timepoint, rev(levels(metadata_lads$timepoint)))) %>%
  gather(parameter, parameter_value, distance, Intercept) %>%
  ggplot(aes(x = timepoint, y = parameter_value, 
             col = CTCF, group = interaction(timepoint, CTCF))) +
  geom_quasirandom(dodge.width = 0.8) +
  geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
  geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
  coord_flip() +
  facet_grid(condition ~ parameter, scale = "free", space = "free_y") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() + 
  theme(#aspect.ratio = 1, 
        axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).

## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 2 rows containing missing values (position_quasirandom).

border_models_plot %>%
  mutate(CTCF = factor(CTCF, c("nonCTCF", "CTCF"))) %>%
  gather(parameter, parameter_value, distance, Intercept) %>%
  ggplot(aes(x = CTCF, y = parameter_value, 
             col = timepoint, group = interaction(CTCF, timepoint))) +
  geom_quasirandom(dodge.width = 0.8) +
  geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
  geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
  coord_flip() +
  facet_grid(condition ~ parameter, scale = "free", space = "free_y") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() + 
  theme(#aspect.ratio = 1, 
    axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).

## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 2 rows containing missing values (position_quasirandom).

# Determine significance - between border classes
border_models_plot %>%
  group_by(condition, timepoint) %>%
  do(test_slope = tidy(wilcox.test(distance  ~ CTCF, data = .))) %>%
  unnest(test_slope) %>%
  mutate(padj = p.adjust(p.value),
         sign = padj < 0.05)
## # A tibble: 14 × 8
##    condition timepoint statistic  p.value method      alternative     padj sign 
##    <fct>     <fct>         <dbl>    <dbl> <chr>       <chr>          <dbl> <lgl>
##  1 PT        0h           160233 4.77e-16 Wilcoxon r… two.sided   5.24e-15 TRUE 
##  2 PT        24h          164092 3.07e-19 Wilcoxon r… two.sided   3.98e-18 TRUE 
##  3 CTCFEL    0h           152742 6.02e-11 Wilcoxon r… two.sided   4.81e-10 TRUE 
##  4 CTCFEL    6h           142146 2.57e- 5 Wilcoxon r… two.sided   5.15e- 5 TRUE 
##  5 CTCFEL    24h          138983 4.46e- 4 Wilcoxon r… two.sided   4.46e- 4 TRUE 
##  6 RAD21     0h           154708 3.44e-12 Wilcoxon r… two.sided   3.10e-11 TRUE 
##  7 RAD21     6h           148114 5.01e- 8 Wilcoxon r… two.sided   3.01e- 7 TRUE 
##  8 RAD21     24h          145961 6.45e- 7 Wilcoxon r… two.sided   3.22e- 6 TRUE 
##  9 WAPL      0h           165520 1.69e-20 Wilcoxon r… two.sided   2.36e-19 TRUE 
## 10 WAPL      6h           159255 2.74e-15 Wilcoxon r… two.sided   2.74e-14 TRUE 
## 11 WAPL      24h          150044 3.01e- 9 Wilcoxon r… two.sided   2.11e- 8 TRUE 
## 12 CTCFWAPL  0h           161698 9.83e-18 Wilcoxon r… two.sided   1.18e-16 TRUE 
## 13 CTCFWAPL  6h           144143 4.71e- 6 Wilcoxon r… two.sided   1.46e- 5 TRUE 
## 14 CTCFWAPL  24h          144382 3.66e- 6 Wilcoxon r… two.sided   1.46e- 5 TRUE
border_models_plot %>%
  group_by(condition, timepoint) %>%
  do(test_intercept = tidy(wilcox.test(Intercept  ~ CTCF, data = .))) %>%
  unnest(test_intercept) %>%
  mutate(padj = p.adjust(p.value),
         sign = padj < 0.05)
## # A tibble: 14 × 8
##    condition timepoint statistic  p.value method      alternative     padj sign 
##    <fct>     <fct>         <dbl>    <dbl> <chr>       <chr>          <dbl> <lgl>
##  1 PT        0h           124460 8.05e- 1 Wilcoxon r… two.sided   1   e+ 0 FALSE
##  2 PT        24h          137856 1.40e- 3 Wilcoxon r… two.sided   7.01e- 3 TRUE 
##  3 CTCFEL    0h           100857 7.58e- 7 Wilcoxon r… two.sided   7.58e- 6 TRUE 
##  4 CTCFEL    6h           121181 6.35e- 1 Wilcoxon r… two.sided   1   e+ 0 FALSE
##  5 CTCFEL    24h          122137 7.92e- 1 Wilcoxon r… two.sided   1   e+ 0 FALSE
##  6 RAD21     0h           108392 1.01e- 3 Wilcoxon r… two.sided   6.05e- 3 TRUE 
##  7 RAD21     6h           106406 1.95e- 4 Wilcoxon r… two.sided   1.55e- 3 TRUE 
##  8 RAD21     24h          104841 4.71e- 5 Wilcoxon r… two.sided   4.24e- 4 TRUE 
##  9 WAPL      0h            92079 6.11e-12 Wilcoxon r… two.sided   7.33e-11 TRUE 
## 10 WAPL      6h            70856 7.70e-31 Wilcoxon r… two.sided   1.00e-29 TRUE 
## 11 WAPL      24h           67776 2.31e-34 Wilcoxon r… two.sided   3.24e-33 TRUE 
## 12 CTCFWAPL  0h            99266 1.18e- 7 Wilcoxon r… two.sided   1.30e- 6 TRUE 
## 13 CTCFWAPL  6h           106397 1.94e- 4 Wilcoxon r… two.sided   1.55e- 3 TRUE 
## 14 CTCFWAPL  24h          111859 1.16e- 2 Wilcoxon r… two.sided   4.62e- 2 TRUE

This attempt was unsuccessful. Linear models at LAD borders are too unreliable.

Conclusion

Looks good. I can show that LAD border positioning is only mildly affected.

SessionInfo

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] knitr_1.33           broom_0.7.9          ggbeeswarm_0.6.0    
##  [4] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.7         
##  [7] purrr_0.3.4          readr_2.0.0          tidyr_1.1.3         
## [10] tibble_3.1.6         ggplot2_3.3.5        tidyverse_1.3.1     
## [13] rtracklayer_1.50.0   GenomicRanges_1.42.0 GenomeInfoDb_1.26.7 
## [16] IRanges_2.24.1       S4Vectors_0.28.1     BiocGenerics_0.36.1 
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7                matrixStats_0.60.0         
##  [3] fs_1.5.0                    lubridate_1.7.10           
##  [5] RColorBrewer_1.1-2          httr_1.4.2                 
##  [7] tools_4.0.5                 backports_1.2.1            
##  [9] bslib_0.2.5.1               utf8_1.2.2                 
## [11] R6_2.5.1                    vipor_0.4.5                
## [13] DBI_1.1.1                   colorspace_2.0-2           
## [15] withr_2.4.2                 tidyselect_1.1.1           
## [17] compiler_4.0.5              cli_3.1.0                  
## [19] rvest_1.0.1                 Biobase_2.50.0             
## [21] xml2_1.3.2                  DelayedArray_0.16.3        
## [23] labeling_0.4.2              sass_0.4.0                 
## [25] scales_1.1.1                digest_0.6.28              
## [27] Rsamtools_2.6.0             rmarkdown_2.11             
## [29] XVector_0.30.0              pkgconfig_2.0.3            
## [31] htmltools_0.5.1.1           MatrixGenerics_1.2.1       
## [33] highr_0.9                   dbplyr_2.1.1               
## [35] rlang_0.4.12                readxl_1.3.1               
## [37] rstudioapi_0.13             farver_2.1.0               
## [39] jquerylib_0.1.4             generics_0.1.0             
## [41] jsonlite_1.7.2              BiocParallel_1.24.1        
## [43] RCurl_1.98-1.3              magrittr_2.0.1             
## [45] GenomeInfoDbData_1.2.4      Matrix_1.3-4               
## [47] Rcpp_1.0.7                  munsell_0.5.0              
## [49] fansi_0.5.0                 lifecycle_1.0.1            
## [51] stringi_1.7.3               yaml_2.2.1                 
## [53] SummarizedExperiment_1.20.0 zlibbioc_1.36.0            
## [55] grid_4.0.5                  crayon_1.4.2               
## [57] lattice_0.20-44             Biostrings_2.58.0          
## [59] haven_2.4.1                 hms_1.1.0                  
## [61] pillar_1.6.4                codetools_0.2-18           
## [63] reprex_2.0.0                XML_3.99-0.6               
## [65] glue_1.5.0                  evaluate_0.14              
## [67] modelr_0.1.8                vctrs_0.3.8                
## [69] tzdb_0.1.2                  cellranger_1.1.0           
## [71] gtable_0.3.0                assertthat_0.2.1           
## [73] xfun_0.24                   GenomicAlignments_1.26.0   
## [75] beeswarm_0.4.0              ellipsis_0.3.2